#!/bin/bash 
mode: 'train'
use_cuda: 1 # 1 for True, 0 for False

sampling_rate: 16000
network: "MossFormer2_SS_16K"  ##network type
## MossFormer2 parameters
num_spks: 2 
encoder_kernel_size: 16
encoder_embedding_dim: 512
mossformer_sequence_dim: 512
num_mossformer_layer: 24

# Train
load_type: 'one_input_multi_outputs'
tr_list: 'data/tr_wsj0_2mix_16k.scp'
cv_list: 'data/cv_wsj0_2mix_16k.scp'

init_learning_rate: 0.00015 #learning rate for a new training
finetune_learning_rate: 0.00005 #learning rate for a finetune training

loss_threshold: -9999.0 #to prevent very low loss
max_epoch: 120

weight_decay: 0.00001
clip_grad_norm: 10.

# Log 
seed: 777

# # dataset
num_workers: 4
batch_size: 1
accu_grad: 1  # accumulate multiple batch sizes for one back-propagation updating
effec_batch_size: 4   # per GPU, only used if accu_grad is set to 1, must be multiple times of batch size
max_length: 2         # truncate the utterances in dataloader, in seconds 

